#!/usr/bin/env escript
%%! -pa ../bc gentab bc bc/gentab

%% vim: set filetype=erlang :

%% Copyright (c) 2013-2014 Cloudozer LLP. All rights reserved.
%% 
%% Redistribution and use in source and binary forms, with or without
%% modification, are permitted provided that the following conditions are met:
%% 
%% * Redistributions of source code must retain the above copyright notice, this
%% list of conditions and the following disclaimer.
%% 
%% * Redistributions in binary form must reproduce the above copyright notice,
%% this list of conditions and the following disclaimer in the documentation
%% and/or other materials provided with the distribution.
%% 
%% * Redistributions in any form must be accompanied by information on how to
%% obtain complete source code for the LING software and any accompanying
%% software that uses the LING software. The source code must either be included
%% in the distribution or be available for no more than the cost of distribution
%% plus a nominal fee, and must be freely redistributable under reasonable
%% conditions.  For an executable file, complete source code means the source
%% code for all modules it contains. It does not include source code for modules
%% or files that typically accompany the major components of the operating
%% system on which the executable file runs.
%% 
%% THIS SOFTWARE IS PROVIDED BY CLOUDOZER LLP ``AS IS'' AND ANY EXPRESS OR
%% IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
%% MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE
%% DISCLAIMED. IN NO EVENT SHALL CLOUDOZER LLP BE LIABLE FOR ANY DIRECT,
%% INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
%% (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
%% ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
%% (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
%% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-mode(compile).

-include_lib("../ling_code.hrl").

%% in words
-define(LOADED_MODULES_SIZE, 500000).

main([BeamSrc,BifTabFile,IopVarsTab]) ->

%% BIFs table is the source of "forced" instruction specializations.
%% If implementation reference is "@" in the BIFs table then a separate
%% opcode must be allocated without regard to frequencies or space
%% savings.

	BifTab = bifs_tab(BifTabFile),

	ForcedVars = [{call_bif,[{b,{M,F,length(As)}}]}
					|| {M,F,As,Impl} <- BifTab, Impl =:= "@"],

	Files = find_beams(BeamSrc),

	OpNodes = lists:foldl(fun(File, OpNodes) ->
		io:format("~s~n", [File]),
		{ok,#m{code=Asm}} = ling_code:beam_to_ling(File),

		SaneAsm = [{Op,[sane(A) || A <- Args]}
			|| {Op,Args,_} <- Asm, Op =/= func_info, Op =/= label, Op =/= line],

		%% func_info is suppressed as its analysis
		%% is long while the result is obvious

		update_opnodes(SaneAsm, OpNodes)
	end, dict:new(), Files),

	%% dump opcodes from iops.tab that never occured
	case lists:filter(fun(Op) ->
			dict:find(Op, OpNodes) =:= error
		end, iops_tab:opcodes()) of
	[] ->
		ok;
	Xs ->
		io:format("*** The following opcode(s) present"
							" in iops_tab:opcodes() never occured:~n", []),
		lists:foreach(fun(Op) ->
			io:format("~w~n", [Op])
		end, Xs)
	end,

	OpNodes1 = [{Op,dict:to_list(SNs)}
		 || {Op,SNs} <- dict:to_list(OpNodes)],

	%% reinject func_info with trivial stats
	OpNodes2 = [{func_info,
				 [{[{a,ok},{a,ok},{u,1}],1}]}|OpNodes1],

	GrandTotal = lists:sum([lists:sum([N
		 || {_,N} <- SNs]) || {_,SNs} <- OpNodes2]),
	io:format("GrandTotal=~w~n", [GrandTotal]),

	Vss = [
		
		begin
			Starters0 = [{S,N / GrandTotal} || {S,N} <- SNs],
			Starters = fix_byte_args(Starters0, Op),
			io:format("~w[~p].~n", [Op,length(Starters)]),

			%% Get arguments specializations that must appear without
			%% regard to frequencies and space savings for a given Op
			%%
			MustHaves = [S || {Op1,S} <- ForcedVars, Op1 =:= Op],

			%% The formula balances the space saving in the loaded moduls code
			%% and the increase of the emulator code.
			%%
			Threshold = vcs(Op) /?LOADED_MODULES_SIZE /4, %% VCS/LMS/4
			
			{Vs0,Residue} = variants(Starters, MustHaves, Threshold),

			Vs = add_base_variant(Vs0, Op, Residue),
			io:format("~p.~n", [Vs]),

			Nos = lists:seq(0, length(Vs)-1),
			[{F,Op,N,D} || {{F,D},N} <- lists:zip(Vs, Nos)]
		end

			|| {Op,SNs} <- OpNodes2
	],

	Variants = lists:concat(Vss),
	
	{ok,Out} = file:open(IopVarsTab, [write]),
	io:format(Out, "%% autogenerated by iopvars_gen\n", []),
	io:format(Out, "~p.\n", [Variants]),
	file:close(Out),

	io:format("~s updated\n", [IopVarsTab]),
	io:format("To improve code further, retrieve the iop counters\n"
			  "using COUNT_IOPS and put the output to hot_cold_iops.\n"
			  "Then run 'make reorder'\n", []);

main(_) ->
	io:format("usage: iopvars_gen beam.src bifs.tab iopvars.tab~n").

sane({f,_}) -> label;
sane({fu,_}) -> funentry;
sane({str,_}) -> string;
sane({'catch',_}) -> 'catch';
sane({float,_}) -> float;
sane({literal,_}) -> literal;
sane({bigint,_}) -> bigint;
sane(nil) -> nil;
sane({_,_}=A) -> A;
sane(U) when is_integer(U), U >= 0 -> {u,U}.

update_opnodes(Asm, OpNodes0) ->
	lists:foldl(fun({Op,Args}, OpNodes) ->
		dict:update(Op, fun(Node) ->
			dict:update_counter(Args, 1, Node)
		end, dict:store(Args, 1, dict:new()), OpNodes)
	end, OpNodes0, Asm).

fix_byte_args(SFs, Op) ->
	Types = iops_tab:arg_types(Op),
	case [S || {S,_} <- SFs, length(S) =/= length(Types)] of
	[] ->
		ok;
	[X|_] ->
		erlang:error({nargs_mismatch,Op,X,Types})
	end,

	[
		{lists:map(fun({{u,U},u8}) ->
			{u8,U};
		({Arg,_}) ->
			Arg
		end, lists:zip(S, Types)),F}

			|| {S,F} <- SFs
	].
	
add_base_variant(Vs, Op, Residue) ->
	Base = ling_iops:broadest_packing(iops_tab:arg_types(Op)),
	case lists:keymember(Base, 2, Vs) of
	true ->
		Vs;
	false ->
		Vs ++ [{Residue,Base}]
	end.

variants(Starters, MustHaves, Threshold) ->
	variants(Starters, MustHaves, Threshold, []).

variants([], [], _, Acc) ->
	{lists:reverse(Acc),0.0};
variants(Starters, [S|MustHaves], Threshold, Acc) ->

	{NodeValues,NodeDescs,NodeFreqs} = pack_tree(Starters),

	case dict:find(S, NodeValues) of
	error ->
		variants(Starters, MustHaves, Threshold, [{0.0,S}|Acc]);

	_ ->
		Descs = dict:fetch(S, NodeDescs),
		Freq = dict:fetch(S, NodeFreqs),

		variants(lists:filter(fun({S1,_}) ->
					not lists:member(S1, Descs)
				 end, Starters),
				 MustHaves,
				 Threshold,
				 [{Freq,S}|Acc])
	end;

variants(Starters, [], Threshold, Acc) ->

	{NodeValues,NodeDescs,NodeFreqs} = pack_tree(Starters),

	SortedNodes = lists:keysort(2, dict:to_list(NodeValues)),
	case hd(lists:reverse(SortedNodes)) of
	{Node,Value} when Value > Threshold ->
		Descs = dict:fetch(Node, NodeDescs),
		Freq = dict:fetch(Node, NodeFreqs),

		variants(lists:filter(fun({S,_}) ->
					not lists:member(S, Descs)
				 end, Starters),
				 [],
				 Threshold,
			 	 [{Freq,Node}|Acc]);
	_ ->
		Residue = lists:sum([F || {_,F} <- Starters]),
		{lists:reverse(Acc),Residue}
	end.

pack_tree(SFs) ->
	pack_tree(SFs, dict:new(), dict:new(), dict:new()).

pack_tree(SFs, NVs0, NDs0, NFs0) ->
	lists:foldl(fun({S,F}, {NVs1,NDs1,NFs1}) ->
		MaxSize = length(S),
		Ds = generate(fun step/1, S),
		NVs = lists:foldl(fun(D, NVs) ->
			V = MaxSize - ling_iops:wsize(D),
			dict:update_counter(D, V*F, NVs)
		end, NVs1, Ds),

		NDs = lists:foldl(fun(D, NDs) ->
			dict:append_list(D, [S], NDs)
		end, NDs1, Ds),

		%%
		%% Calculate the variant frequency without regard to the space savings
		%% to sort variants in the proc_main()
		%%
		NFs = lists:foldl(fun(D, NFs) ->
			dict:update_counter(D, F, NFs)
		end, NFs1, Ds),

		{NVs,NDs,NFs}
	end, {NVs0,NDs0,NFs0}, SFs).

generate(_, []) ->
	[[]];
generate(F, [A|As]) ->
	[[G|Gs] || G <- F(A), Gs <- generate(F, As)].

step(label) -> [f];
step(float) -> [t];
step(literal) -> [t];
step({u8,_}=C) -> [C,u8];
step({u,N}=C) when is_integer(N), N >= 0, N =< 255 -> [C,u8,u32];
step({u,N}=C) when is_integer(N), N >= 0 -> [C,u32];
step({a,_}=C) -> [C,t];
step({smallint,I}=C) when I >= -128, I =< 127 -> [C,i8,t];
step({smallint,_}=C) -> [C,t];
step(bigint) -> [t];
step(nil=C) -> [C,t];
step({x,0}=C) -> [C,t];
step({x,X}=C) when X =< 255 -> [C,x8,t];	%% never r0
step({x,_}=C) -> [C,t];
step({y,Y}=C) when Y =< 255 -> [C,y8,t];
step({y,_}=C) -> [C,t];
step({fr,_}=C) -> [C,fr];
step({e,_}=C) -> [C,e];
step({b,_}=C) -> [C,b];
step(funentry) -> [fu];
step(string) -> [str];
step('catch') -> [t].

bifs_tab(BifTabFile) ->
	{ok,In} = file:open(BifTabFile, [read]),
	BifTab = bifs_tab_1(In, []),
	BifTab.

bifs_tab_1(In, Acc) ->
	case io:get_line(In, []) of
	eof -> Acc;
	"#" ++ _ -> bifs_tab_1(In, Acc);
	"\n" -> bifs_tab_1(In, Acc);
	Line ->
		{match,[_,M,F,As,Impl]} = 
			re:run(Line,
				"([^:]+):([^(]+)\\((.*)\\)[ \t]+(.*)",
				[{capture,all,list}]),
		As1 = if As =:= [] -> [];
			true -> re:split(As, ",", [{return,list}]) end,
		BifSpec = {list_to_atom(M),
				   list_to_atom(F),
				   lists:map(fun erlang:list_to_atom/1, As1),
				   Impl},
		bifs_tab_1(In, [BifSpec|Acc])
	end.

%%------------------------------------------------------------------------------

find_beams(BeamSrc) ->
	{ok,Bin} = file:read_file(BeamSrc),
	lists:foldl(fun("", Beams) ->
		Beams;
	("#" ++ _, Beams) -> %% comment
		Beams;
	("@" ++ Lib, Beams) -> %% OTP library
		case code:lib_dir(list_to_atom(Lib)) of
		{error,_} ->
			io:format("OTP library ~s not found\n", [Lib]),
			halt(1);
		Dir ->
			filelib:wildcard(filename:join([Dir,ebin,"*.beam"]))
				++ Beams
		end;
	(Name, Beams) -> %% file or dir
		case filelib:is_dir(Name) of
		true ->
			dive_dir(Name)
				++ Beams;
		false ->
			case filelib:is_file(Name) of
			true ->
				[Name|Beams];
			false ->
				io:format("~s not found\n", [Name]),
				halt(1)
			end
		end
	end, [], re:split(Bin, "\n", [{return,list},trim])).

dive_dir(Dir) ->
	dive_dir([Dir], []).

dive_dir([], Acc) ->
	lists:concat(Acc);
dive_dir([Dir|Dirs], Acc) ->
	MoreDirs = lists:filter(fun filelib:is_dir/1,
		filelib:wildcard(filename:join(Dir, "*"))),
	Beams = filelib:wildcard(filename:join(Dir, "*.beam")),
	dive_dir(MoreDirs ++ Dirs, [Beams|Acc]).

%%------------------------------------------------------------------------------
%%
%% Variant code sizes - approximate increase of emulator code when a new variant
%% of an instruction gets introduced.
%%
%% Generated using vcs_gen
%% 395 variants
%% 10/88/409 min/avg/max size

vcs(allocate_heap) -> 123;
vcs(allocate_heap_zero) -> 306;
vcs(allocate_init) -> 102;
vcs(apply) -> 252;
vcs(apply_last) -> 289;
vcs(badmatch) -> 33;
vcs(bif1_body) -> 97;
vcs(bif2_body) -> 99;
vcs(bs_context_to_binary) -> 39;
vcs(bs_init_writable) -> 105;
vcs(call_bif) -> 122;
vcs(case_end) -> 60;
vcs(catch_end) -> 55;
vcs(deallocate_return) -> 36;
vcs(extract_next_element) -> 43;
vcs(extract_next_element2) -> 54;
vcs(extract_next_element3) -> 66;
vcs(fclearerror) -> 11;
vcs(fconv) -> 107;
vcs(fmove_1) -> 90;
vcs(fmove_2) -> 106;
vcs(func_info) -> 122;
vcs(get_list) -> 82;
vcs(get_tuple_element) -> 79;
vcs(if_end) -> 59;
vcs(init) -> 24;
vcs(init2) -> 57;
vcs(init3) -> 81;
vcs(int_code_end) -> 50;
vcs(is_atom) -> 55;
vcs(is_bigint) -> 55;
vcs(is_binary) -> 55;
vcs(is_bitstr) -> 55;
vcs(is_boolean) -> 63;
vcs(is_float) -> 55;
vcs(is_function) -> 55;
vcs(is_function2) -> 82;
vcs(is_integer) -> 69;
vcs(is_integer_allocate) -> 69;
vcs(is_list) -> 48;
vcs(is_nil) -> 32;
vcs(is_nonempty_list) -> 30;
vcs(is_nonempty_list_allocate) -> 39;
vcs(is_nonempty_list_test_heap) -> 24;
vcs(is_number) -> 69;
vcs(is_pid) -> 69;
vcs(is_port) -> 69;
vcs(is_reference) -> 55;
vcs(is_tuple) -> 44;
vcs(is_tuple_of_arity) -> 41;
vcs(jump) -> 10;
vcs(l_allocate) -> 68;
vcs(l_allocate_zero) -> 125;
vcs(l_apply) -> 290;
vcs(l_apply_fun) -> 140;
vcs(l_apply_fun_last) -> 202;
vcs(l_apply_fun_only) -> 241;
vcs(l_apply_last) -> 222;
vcs(l_apply_only) -> 196;
vcs(l_band) -> 154;
vcs(l_bif1) -> 114;
vcs(l_bif2) -> 99;
vcs(l_bor) -> 86;
vcs(l_bs_add) -> 136;
vcs(l_bs_append) -> 272;
vcs(l_bs_get_binary2) -> 135;
vcs(l_bs_get_binary_all2) -> 110;
vcs(l_bs_get_binary_all_reuse) -> 97;
vcs(l_bs_get_binary_imm2) -> 103;
vcs(l_bs_get_float2) -> 288;
vcs(l_bs_get_integer) -> 111;
vcs(l_bs_get_integer_16) -> 95;
vcs(l_bs_get_integer_32) -> 101;
vcs(l_bs_get_integer_8) -> 95;
vcs(l_bs_get_integer_imm) -> 98;
vcs(l_bs_get_integer_small_imm) -> 114;
vcs(l_bs_get_utf16) -> 204;
vcs(l_bs_get_utf8) -> 184;
vcs(l_bs_init) -> 273;
vcs(l_bs_init_bits) -> 333;
vcs(l_bs_init_bits_fail) -> 381;
vcs(l_bs_init_fail) -> 409;
vcs(l_bs_init_heap_bin) -> 203;
vcs(l_bs_match_string) -> 99;
vcs(l_bs_private_append) -> 200;
vcs(l_bs_put_string) -> 67;
vcs(l_bs_put_utf16) -> 82;
vcs(l_bs_put_utf8) -> 75;
vcs(l_bs_restore2) -> 111;
vcs(l_bs_save2) -> 111;
vcs(l_bs_skip_bits2) -> 193;
vcs(l_bs_skip_bits_all2) -> 112;
vcs(l_bs_skip_bits_imm2) -> 110;
vcs(l_bs_start_match2) -> 47;
vcs(l_bs_test_tail_imm2) -> 95;
vcs(l_bs_test_unit) -> 106;
vcs(l_bs_test_unit_8) -> 90;
vcs(l_bs_test_zero_tail2) -> 88;
vcs(l_bs_utf16_size) -> 101;
vcs(l_bs_utf8_size) -> 101;
vcs(l_bs_validate_unicode) -> 67;
vcs(l_bs_validate_unicode_retract) -> 93;
vcs(l_bsl) -> 158;
vcs(l_bsr) -> 158;
vcs(l_bxor) -> 90;
vcs(l_call) -> 42;
vcs(l_call_ext) -> 62;
vcs(l_call_ext_last) -> 80;
vcs(l_call_ext_only) -> 54;
vcs(l_call_fun) -> 103;
vcs(l_call_fun_last) -> 191;
vcs(l_call_last) -> 57;
vcs(l_call_only) -> 31;
vcs(l_catch) -> 65;
vcs(l_element) -> 168;
vcs(l_fadd) -> 118;
vcs(l_fast_element) -> 84;
vcs(l_fcheckerror) -> 11;
vcs(l_fdiv) -> 136;
vcs(l_fetch) -> 60;
vcs(l_fmul) -> 118;
vcs(l_fnegate) -> 51;
vcs(l_fsub) -> 118;
vcs(l_gc_bif1) -> 179;
vcs(l_gc_bif2) -> 178;
vcs(l_gc_bif3) -> 217;
vcs(l_get) -> 82;
vcs(l_increment) -> 136;
vcs(l_int_bnot) -> 99;
vcs(l_int_div) -> 67;
vcs(l_is_eq) -> 52;
vcs(l_is_eq_exact) -> 52;
vcs(l_is_eq_exact_immed) -> 34;
vcs(l_is_eq_exact_literal) -> 97;
vcs(l_is_function2) -> 59;
vcs(l_is_ge) -> 56;
vcs(l_is_lt) -> 56;
vcs(l_is_ne) -> 97;
vcs(l_is_ne_exact) -> 100;
vcs(l_is_ne_exact_immed) -> 48;
vcs(l_is_ne_exact_literal) -> 108;
vcs(l_jump_on_val) -> 51;
vcs(l_loop_rec) -> 59;
vcs(l_m_div) -> 169;
vcs(l_make_export) -> 117;
vcs(l_make_fun) -> 276;
vcs(l_minus) -> 136;
vcs(l_move_call) -> 75;
vcs(l_move_call_ext) -> 98;
vcs(l_move_call_ext_last) -> 115;
vcs(l_move_call_ext_only) -> 85;
vcs(l_move_call_last) -> 81;
vcs(l_move_call_only) -> 44;
vcs(l_new_bs_put_binary) -> 168;
vcs(l_new_bs_put_binary_all) -> 119;
vcs(l_new_bs_put_binary_imm) -> 77;
vcs(l_new_bs_put_float) -> 254;
vcs(l_new_bs_put_float_imm) -> 152;
vcs(l_new_bs_put_integer) -> 196;
vcs(l_new_bs_put_integer_imm) -> 123;
vcs(l_plus) -> 123;
vcs(l_put_tuple) -> 133;
vcs(l_recv_set) -> 50;
vcs(l_rem) -> 67;
vcs(l_select_tuple_arity) -> 51;
vcs(l_select_tuple_arity2) -> 42;
vcs(l_select_val2) -> 55;
vcs(l_select_val_atoms) -> 178;
vcs(l_select_val_smallints) -> 134;
vcs(l_times) -> 142;
vcs(l_trim) -> 25;
vcs(l_wait_timeout) -> 24;
vcs(l_yield) -> 60;
vcs(loop_rec_end) -> 43;
vcs(move) -> 46;
vcs(move2) -> 77;
vcs(move_deallocate_return) -> 56;
vcs(move_jump) -> 40;
vcs(move_return) -> 37;
vcs(node) -> 54;
vcs(on_load) -> 62;
vcs(put_list) -> 69;
vcs(raise) -> 91;
vcs(recv_mark) -> 51;
vcs(remove_message) -> 69;
vcs(return) -> 27;
vcs(self) -> 57;
vcs(send) -> 272;
vcs(set_tuple_element) -> 72;
vcs(system_limit) -> 22;
vcs(test_arity) -> 59;
vcs(test_heap) -> 39;
vcs(test_heap_1_put_list) -> 68;
vcs(timeout) -> 47;
vcs(try_case_end) -> 48;
vcs(try_end) -> 67;
vcs(wait) -> 65;
vcs(wait_timeout) -> 193;
vcs(_) -> 88.

%%EOF
